if (!require("pacman")) install.packages("pacman")
Loading required package: pacman
pacman::p_load(tidyverse, ggridges, glue, scales, ggthemes, openintro, ggrepel, dsbox, janitor, fs)# set theme for ggplot2ggplot2::theme_set(ggplot2::theme_minimal(base_size =14))# set width of code outputoptions(width =85)# set figure parameters for knitrknitr::opts_chunk$set(fig.width =7, # 7" widthfig.asp =0.618, # the golden ratiofig.retina =3, # dpi multiplier for displaying HTML output on retinafig.align ="center", # center align figuresdpi =300# higher dpi, sharper image)###All responses are in comments within the code****
1 - A new day, a new plot, a new geom
#read from dsbox pkg dataset#glimpse(edibnb)edibnb <- edibnb |>filter(!is.na(neighbourhood), !is.na(review_scores_rating))median_rating <- edibnb |>group_by(neighbourhood) |>summarise(median_review_score =median(review_scores_rating, na.rm =TRUE))|>arrange(median_review_score)# got the median review score per neighborhood#glimpse(median_rating)neighborhood_order <- edibnb |>mutate(neighbourhood =factor(neighbourhood, levels = median_rating$neighbourhood)) #orderingggplot(neighborhood_order, aes(x = review_scores_rating, y = neighbourhood)) +geom_density_ridges(alpha =0.7) +theme(legend.position ="none",plot.subtitle =element_text(size =8)) +labs(title ="Airbnb Review of Edinburgh Neighborhood",x ="Review Score (0–100)",y ="Neighborhood",subtitle ="DensityRidge Plot" )
Picking joint bandwidth of 1.23
This plot, shows the distribution of Airbnb review scores across different neighborhoods in Edinburgh. Each ridge represents a neighborhood and illustrates how review scores are spread out for listings in that area. The highest scoring neighborhoods tend to have a “taller” distribution curve, implying a tighter spread of high review scores compared to the bottom few.
2 - Foreign Connected PACs
# get a list of files with "Foreign Connected PAC" in their nameslist_of_files <-dir_ls(path ="data", regexp ="Foreign Connected PAC")# read all files and bindpac <-read_csv(list_of_files, id ="year")# Clean the column namescleaned_pac <-clean_names(pac)|>separate(`country_of_origin_parent_company`, into =c("country_of_origin", "parent_company"),sep ="/") |>mutate(yearStr =str_split(year, "-", simplify =TRUE)[, 2],clean_year_string =str_remove(yearStr, fixed(".csv")),year =as.integer(clean_year_string),repubs =as.numeric(str_remove(repubs, "\\$")),dems =as.numeric(str_remove(dems, "\\$"))) |>select(-yearStr, -clean_year_string, -total)